Test Queries:


Added Self joins test cases. 
Also added Vladimir's method which requires the following code to be run first:

begin work;

create function var_set (text,text) returns text as $$
  select set_config ('public.' || $2 || pg_backend_pid(), $1, false);
$$ LANGUAGE 'sql';

create function var_get (text) returns text as $$
  select current_setting('public.' || $1 || pg_backend_pid());
$$ LANGUAGE 'sql';

create operator >>> (procedure = var_set, leftarg = text, rightarg = text);
create operator <<< (procedure = var_get, rightarg = text);

commit;

The above code exploits the use of custom GUC variables to get the lagged value. Clever but ditry :-)

----------------------------------------------------------------------------
-- Test 1 -- 
--
-- Notes: Get the name, department and salary of highest paid department
--        member for each department. If the two highest paid employees
--        in the department get the same salary show the one with the
--        lowest id.
----------------------------------------------------------------------------

create table employees (
  id INT primary key,
  name varchar(30) not null,
  department varchar(30) not null,
  salary int not null,
  check (salary >= 0)
);


insert into employees values(1,'Jeff','IT',10000);
insert into employees values(2,'Sam','IT',12000);

insert into employees values(3,'Richard','Manager',30000);
insert into employees values(4,'Ian','Manager',20000);

insert into employees values(5,'John','IT',60000);
insert into employees values(6,'Matthew','Director',60000);

VACUUM ANALYZE employees;


-- Normal way.

SELECT name,department,salary
FROM employees AS e
WHERE id = (SELECT id FROM employees WHERE department = e.department ORDER BY salary DESC,id ASC LIMIT 1);

-- above query = 498 tps


-- With windowing functions.

SELECT name,department,salary
FROM (SELECT name,
             department,
             salary,
             row_number() OVER (PARTITION BY department ORDER BY salary DESC,id ASC) AS num
      FROM employees
) AS t WHERE num = 1;
 
-- above query = 578 tps

----------------------------------------------------------------------------
-- Test 2 -- Split times problem
-- 
-- Notes: Find the interval of time between 1 timestamp and the previous
-- timestamp. The previous timestamp being the one with the next lowest id
-- column value. If there is no previous timestamp show the value NULL.

CREATE TABLE tstest (
  id SERIAL NOT NULL PRIMARY KEY,
  timestamp TIMESTAMP NOT NULL
);

INSERT INTO tstest (timestamp) VALUES(NOW());

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval 
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

INSERT INTO tstest (timestamp)
SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval
FROM tstest;

VACUUM ANALYZE tstest;


-- Sub query
SELECT timestamp - (SELECT timestamp FROM tstest WHERE id < t.id ORDER BY id DESC LIMIT 1)
FROM tstest t;

-- above query = 336 tps


-- Self Join, (breaks requirements! -> requires gapless sequence of IDs)

SELECT t1.timestamp - t2.timestamp
FROM tstest t1
LEFT OUTER JOIN tstest t2 ON t2.id + 1 = t1.id;

-- above query = 424 tps

-- Vladimir's method.

select '' >>> 'lag';
select timestamp::timestamp-(case lag when '' then null else lag end)::timestamp
from (select <<<'lag' as lag,
             timestamp::text >>>'lag' as timestamp
      from tstest 
      order by id
) as t

-- above query = 182.78 tps

-- windowing functions.

SELECT timestamp - LAG(timestamp,1) OVER(ORDER BY id)
FROM tstest;

-- above query = 481 tps


------------------------------------------------------------------------------------------
-- Test 3 -- Meter reading problem
--
-- Notes: Some testing with slightly larger tables.
--        Find the day were the least units were used. This will be the smallest difference
--        between that day and the previous meter reading. Note, this does not have to be
--        the previous day. It's possible that the meter was not read on some days.
        

CREATE TABLE meter_readings (
 date DATE NOT NULL PRIMARY KEY,
 reading DOUBLE PRECISION NOT NULL
);


-- Create inital meter reading of 0 for 1st Jan 2000.

INSERT INTO meter_readings (date,reading) VALUES('2000-01-01',0);

-- Create 10000 readings for each day after 2000-01-01.

INSERT INTO meter_readings (date,reading) 
SELECT '2000-01-01'::date + i.a,i.a * 10000 + RANDOM() * 10000 FROM generate_series(1,10000) AS i(a);

VACUUM ANALYZE meter_readings;


-- Sub query test

SELECT date,reading - (SELECT reading FROM meter_readings WHERE date < p.date ORDER BY date DESC LIMIT 1) AS used
FROM meter_readings p
ORDER BY used ASC NULLS LAST LIMIT 1;

-- above query = 1.3 tps.

-- Self join (breaks requirements! requires meter to be read daily!!)

SELECT t1.date,t1.reading - t2.reading AS used
FROM meter_readings t1
LEFT OUTER JOIN meter_readings t2 ON t2.date + 1 = t1.date
ORDER BY used ASC NULLS LAST LIMIT 1;

-- above query = 7.59 tps


-- Vladimir's method.
-- edit postgresql.conf -> custom_variable_classes = 'public' 

select '' >>> 'lag';
select date,
       reading::numeric-(case lag when '' then null else lag end)::numeric as used
from (select date,
             <<<'lag' as lag,
             reading::text >>>'lag' as reading
      from meter_readings
      order by date
) as t order by used asc nulls last limit 1

-- above query = 1.9 tps


-- Windowing functions

SELECT date,reading - LAG(reading,1) OVER (ORDER BY date) AS used
FROM meter_Readings
ORDER BY used ASC NULLS LAST LIMIT 1;


-- above query = 8.45 tps.

/* Notes:

There seems to be a small issue with the above query.

david=# explain SELECT date,reading - LAG(reading,1) OVER (ORDER BY date) AS used
david-# FROM meter_Readings
david-# ORDER BY used ASC NULLS LAST LIMIT 1;
                                         QUERY PLAN
---------------------------------------------------------------------------------------------
 Limit  (cost=989.49..989.49 rows=1 width=12)
   ->  Sort  (cost=989.49..1014.49 rows=10001 width=12)
         Sort Key: ((reading - lag(reading, (1))))
         ->  Window  (cost=814.47..939.48 rows=10001 width=12)
               ->  Sort  (cost=814.47..839.47 rows=10001 width=12)
                     Sort Key: date
                     ->  Seq Scan on meter_readings  (cost=0.00..150.01 rows=10001 width=12)


The planner uses a seq scan on meter_readings then performs a sort on date.
Even if I do SET enable_seqscan = off; the planner still performs a seq_scan.
Why can't the exeutor use an index scan on the primary key?

*/


------------------------------------------------------------------------------------------
-- Test 4 -- Marathon Results Problem
--
-- Notes: Show the runnerid,name,time taken and the position of each Marathon runner.

CREATE TABLE marathon_results (
  runnerid SERIAL NOT NULL PRIMARY KEY,
  name VARCHAR(30) NOT NULL,
  time INTERVAL NOT NULL
);

INSERT INTO marathon_results (name,time) VALUES('Samuel','02:05:24');
INSERT INTO marathon_results (name,time) VALUES('Paul','02:04:55');
INSERT INTO marathon_results (name,time) VALUES('Haile','02:03:59');
INSERT INTO marathon_results (name,time) VALUES('Martin','02:05:15');
INSERT INTO marathon_results (name,time) VALUES('Sammy','02:04:56');
INSERT INTO marathon_results (name,time) VALUES('David','03:15:56');
INSERT INTO marathon_results (name,time) VALUES('Eric','03:15:56');
INSERT INTO marathon_results (name,time) VALUES('Alan','03:24:56');


VACUUM ANALYZE marathon_results;

-- sub query

SELECT runnerid,name,time,(SELECT COUNT(DISTINCT time)+1 FROM marathon_results WHERE time < r.time) AS position
FROM marathon_results r
ORDER BY position;

-- above query = 424 tps


-- Self join

SELECT t1.runnerid,t1.name,t1.time,COUNT(DISTINCT t2.time) + 1 AS position
FROM marathon_results t1
LEFT OUTER JOIN marathon_results t2 ON t2.time < t1.time
GROUP BY t1.runnerid,t1.name,t1.time
order by position;

-- above query = 361 tps

-- Vladimir's method.
-- edit postgresql.conf -> custom_variable_classes = 'public' 

-- init values
select ''>>>'prev_time', '0'>>>'dense_rank';

select runnerid,name,time,position from (
    select (((case when time::text = <<<'prev_time' then 0 else 1 end)+(<<<'dense_rank')::int4)::text>>>'dense_rank')::int4 as position,
           runnerid,
          time,
          time::text>>>'prev_time',
          name
     from (SELECT * FROM marathon_results ORDER BY time) m
) results
ORDER BY position;

-- above query = 182 tps


-- With windowing functions.

SELECT runnerid,name,time,DENSE_RANK() OVER(ORDER BY time ASC) AS position
FROM marathon_results
ORDER BY position;

-- above query = 629 tps


------------------------------------------------------------------------------------------
-- Test 5 -- Big Marathon Problem
--
-- Notes: As above but bigger table.

CREATE TABLE big_marathon (
  runnerid SERIAL NOT NULL PRIMARY KEY,
  time INTERVAL NOT NULL
);

-- Generate 10,000 random times from 2 hrs to 4 hrs
INSERT INTO big_marathon (time)
SELECT '03:00:00'::interval + (CAST(RANDOM() * 3600 AS INT) || ' secs')::INTERVAL - (CAST(RANDOM() * 3600 AS INT) || ' secs')::INTERVAL
FROM generate_series(1,10000);


-- Give the non-windowing one a chance.
CREATE INDEX big_marathon_time_idx ON big_marathon (time);

VACUUM ANALYZE big_marathon;

-- Who was in Nth place (using 2nd place here but could be anything)

-- Sub query

SELECT runnerid,time,position
FROM (SELECT runnerid,time,(SELECT COUNT(DISTINCT time)+1 FROM big_marathon WHERE time < r.time) AS position
      FROM big_marathon r
) results
WHERE position = 2;

-- above query = 404851.763 ms (6 mins 44 secs)

-- Self join

SELECT t.runnerid,t.name,t.time,t.position 
FROM (SELECT t1.runnerid,t1.name,t1.time,COUNT(DISTINCT t2.time) + 1 AS position
      FROM big_marathon t1
      LEFT OUTER JOIN big_marathon t2 ON t2.time < t1.time
      GROUP BY t1.runnerid,t1.name,t1.time
) t WHERE t.position = 2;

/*
ERROR:  could not write block 645447 of temporary file: No space left on device
HINT:  Perhaps out of disk space?

Perhaps not, but it was REALLY slow anyway.
*/

-- Vladimir's method.
-- edit postgresql.conf -> custom_variable_classes = 'public' 

-- init values
select ''>>>'prev_time', '0'>>>'dense_rank';

select runnerid,time,position from (
    select (((case when time::text = <<<'prev_time' then 0 else 1 end)+(<<<'dense_rank')::int4)::text>>>'dense_rank')::int4 as position,
           runnerid,
          time,
          time::text>>>'prev_time'
     from big_marathon      order by time
) results where position=2;


-- above query = 617 ms

-- With windowing functions.

SELECT runnerid,time,position
FROM (SELECT runnerid,time,DENSE_RANK() OVER(ORDER BY time ASC) AS position
      FROM big_marathon 
) results
WHERE position = 2;

-- above query = 115.932 ms

-- Notes: The windowing function method did not use the index on the time column.


------------------------------------------------------------------------------------------
-- Test 6 -- Stock Problem
--
-- Notes: If we have a table for stock which holds the stock levels for each product
--        and we have an orders table that contains orders for products and their
--        due dates. We need to know how much stock we have for each order. We
--        need to allocate stock to an order with the earliest due date first.
--        If there are two orders with the same due date the one with the lowest
--        order id number should take the stock first.
--        We need to see a list of orders with the orderid, partnum, duedate and 
--        the orderqty and the remaining stock after the order is completed. If the
--        remaining stock is less than zero we show that rather than 0 to indicate
--        how much stock needs to be bought or made.

CREATE TABLE stock (
  partnum VARCHAR(16) NOT NULL PRIMARY KEY,
  stockqty INT NOT NULL,
  CHECK (stockqty > 0)
);

CREATE TABLE orders (
  orderid SERIAL NOT NULL PRIMARY KEY,
  partnum VARCHAR(16) NOT NULL,
  duedate DATE NOT NULL,
  orderqty INT NOT NULL,
  CHECK (orderqty > 0)
);

INSERT INTO stock VALUES('CPU',1000);
INSERT INTO stock VALUES('HARDDRIVE',2000);
INSERT INTO stock VALUES('KEYBOARD',4000);
INSERT INTO stock VALUES('MOUSE',9000);
INSERT INTO stock VALUES('MONITOR',100);

INSERT INTO orders (partnum,duedate,orderqty) VALUES('CPU','2008-12-22', 700);
INSERT INTO orders (partnum,duedate,orderqty) VALUES('CPU','2008-11-15', 600);

INSERT INTO orders (partnum,duedate,orderqty) VALUES('HARDDRIVE','2008-11-05', 1200);
INSERT INTO orders (partnum,duedate,orderqty) VALUES('HARDDRIVE','2008-11-17', 200);

INSERT INTO orders (partnum,duedate,orderqty) VALUES('KEYBOARD','2008-11-17', 1000);
INSERT INTO orders (partnum,duedate,orderqty) VALUES('KEYBOARD','2008-12-03', 3000);

INSERT INTO orders (partnum,duedate,orderqty) VALUES('MONITOR','2008-12-03', 50);
INSERT INTO orders (partnum,duedate,orderqty) VALUES('MONITOR','2008-12-03', 60);

VACUUM ANALYZE stock;
VACUUM ANALYZE orders;


-- Sub query

SELECT t.orderid,
       t.partnum,
       t.duedate,
       t.orderqty,
       st.stockqty - t.orderqty - COALESCE((SELECT SUM(orderqty)
                                                FROM orders
                                                WHERE partnum = t.partnum AND (duedate < t.duedate OR (duedate = t.duedate AND orderid < t.orderid))
                                       ),0) AS remaining
FROM orders t
INNER JOIN stock st ON t.partnum = st.partnum
ORDER BY t.partnum ASC,t.duedate ASC,t.orderid ASC;

-- above query = 253 tps.


-- With windowing functions

SELECT o.orderid,
       o.partnum,
       o.duedate,
       o.orderqty,
       s.stockqty - SUM(o.orderqty) OVER (PARTITION BY o.partnum ORDER BY o.duedate ASC,o.orderid ASC) AS remaining
FROM orders o
INNER JOIN stock s ON o.partnum = s.partnum
ORDER BY o.partnum ASC,o.duedate ASC,o.orderid ASC;

-- above query = 305 tps.